home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Collection of Internet
/
Collection of Internet.iso
/
infosrvr
/
dev
/
libhtml_.tar
/
MIFwriter.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-01-21
|
15KB
|
730 lines
/* MIFwriter.c -- MIF output support for WWW
* $Id: SGMLmain.c,v 1.3 93/01/06 18:40:27 connolly Exp Locker: connolly $
*/
/* implements ... */
#include "MIFwriter.h"
/* uses ... */
#include "SGML.h"
#include "HTParse.h"
#include "HTMLdtd.h"
#include <stdio.h>
#include <string.h>
#include "object.h"
#include "debug.h"
typedef struct{
char gi[SGML_NAMELEN + 1];
int content;
}Element;
typedef struct{
FILE* out;
Element stack[SGML_TAGLVL];
int literal;
int taglvl;
int needspace;
int empty; /* current paragraph is empty */
enum {
MIFFile,
VariableFormats, VariableDef,
TextFlow, ParaLine, Font
}state;
}MIF;
#define STATE(m, s, l, p) ((m)->state=(s),(m)->literal=l, \
(p) ? ((m)->empty=1,(m)->needspace=0) : 0)
static HMStartTagProc start_tag;
static HMEndTagProc end_tag;
static HMDataProc data;
static HMFileWriterProc MIFwriter_new;
static HMDeleteProc MIFwriter_dt;
static VOID
marker PARAMS((MIF* m,
CONST HMBinding *attributes,
int nattrs));
HMDoc_Class MIFwriter = {MIFwriter_new, 0, MIFwriter_dt,
start_tag, end_tag, data, html_entity_text};
static int FrameEncoding[] =
{
/* 160 /space -> */ 0x20,
/* 161 /exclamdown -> */ 0xc1,
/* 162 /cent -> */ 0xa2,
/* 163 /sterling -> */ 0xa3,
/* 164 /currency -> */ 0xdb,
/* 165 /yen -> */ 0xb4,
/* 166 /brokenbar -> */ 0x00,
/* 167 /section -> */ 0xa4,
/* 168 /dieresis -> */ 0xac,
/* 169 /copyright -> */ 0xa9,
/* 170 /ordfeminine -> */ 0xbb,
/* 171 /guillemotleft -> */ 0xc7,
/* 172 /logicalnot -> */ 0xc2,
/* 173 /hyphen -> */ 0x2d,
/* 174 /registered -> */ 0xa8,
/* 175 /macron -> */ 0xf8,
/* 176 /degree -> */ 0x00,
/* 177 /plusminus -> */ 0x00,
/* 178 /twosuperior -> */ 0x00,
/* 179 /threesuperior -> */ 0x00,
/* 180 /acute -> */ 0xab,
/* 181 /mu -> */ 0x00,
/* 182 /paragraph -> */ 0xa6,
/* 183 /periodcentered -> */ 0xe1,
/* 184 /cedilla -> */ 0xfc,
/* 185 /onesuperior -> */ 0x00,
/* 186 /ordmasculine -> */ 0xbc,
/* 187 /guillemotright -> */ 0xc8,
/* 188 /onequarter -> */ 0x00,
/* 189 /onehalf -> */ 0x00,
/* 190 /threequarters -> */ 0x00,
/* 191 /questiondown -> */ 0xc0,
/* 192 /Agrave -> */ 0xcb,
/* 193 /Aacute -> */ 0xe7,
/* 194 /Acircumflex -> */ 0xe5,
/* 195 /Atilde -> */ 0xcc,
/* 196 /Adieresis -> */ 0x80,
/* 197 /Aring -> */ 0x81,
/* 198 /AE -> */ 0xae,
/* 199 /Ccedilla -> */ 0x82,
/* 200 /Egrave -> */ 0xe9,
/* 201 /Eacute -> */ 0x83,
/* 202 /Ecircumflex -> */ 0xe6,
/* 203 /Edieresis -> */ 0xe8,
/* 204 /Igrave -> */ 0xed,
/* 205 /Iacute -> */ 0xea,
/* 206 /Icircumflex -> */ 0xeb,
/* 207 /Idieresis -> */ 0xec,
/* 208 /Eth -> */ 0x00,
/* 209 /Ntilde -> */ 0x84,
/* 210 /Ograve -> */ 0xf1,
/* 211 /Oacute -> */ 0xee,
/* 212 /Ocircumflex -> */ 0xef,
/* 213 /Otilde -> */ 0xcd,
/* 214 /Odieresis -> */ 0x85,
/* 215 /multiply -> */ 0x00,
/* 216 /Oslash -> */ 0xaf,
/* 217 /Ugrave -> */ 0xf4,
/* 218 /Uacute -> */ 0xf2,
/* 219 /Ucircumflex -> */ 0xf3,
/* 220 /Udieresis -> */ 0x86,
/* 221 /Yacute -> */ 0x00,
/* 222 /Thorn -> */ 0x00,
/* 223 /germandbls -> */ 0xa7,
/* 224 /agrave -> */ 0x88,
/* 225 /aacute -> */ 0x87,
/* 226 /acircumflex -> */ 0x89,
/* 227 /atilde -> */ 0x8b,
/* 228 /adieresis -> */ 0x8a,
/* 229 /aring -> */ 0x8c,
/* 230 /ae -> */ 0xbe,
/* 231 /ccedilla -> */ 0x8d,
/* 232 /egrave -> */ 0x8f,
/* 233 /eacute -> */ 0x8e,
/* 234 /ecircumflex -> */ 0x90,
/* 235 /edieresis -> */ 0x91,
/* 236 /igrave -> */ 0x93,
/* 237 /iacute -> */ 0x92,
/* 238 /icircumflex -> */ 0x94,
/* 239 /idieresis -> */ 0x95,
/* 240 /eth -> */ 0x00,
/* 241 /ntilde -> */ 0x96,
/* 242 /ograve -> */ 0x98,
/* 243 /oacute -> */ 0x97,
/* 244 /ocircumflex -> */ 0x99,
/* 245 /otilde -> */ 0x9b,
/* 246 /odieresis -> */ 0x9a,
/* 247 /divide -> */ 0x00,
/* 248 /oslash -> */ 0xbf,
/* 249 /ugrave -> */ 0x9d,
/* 250 /uacute -> */ 0x9c,
/* 251 /ucircumflex -> */ 0x9e,
/* 252 /udieresis -> */ 0x9f,
/* 253 /yacute -> */ 0x00,
/* 254 /thorn -> */ 0x00,
/* 255 /ydieresis -> */ 0xd8,
};
/* mifwriter constructor */
static HMDoc*
MIFwriter_new(fp)
FILE* fp;
{
MIF* m = NEW(MIF, 1);
m->out = fp;
m->taglvl = 1;
strcpy(m->stack[0].gi, "HTML"); /* @@ fake tag minimization */
STATE(m, MIFFile, 0, 1);
fprintf(m->out,
"<MIFFile 3.00> # Generated by html2mif\n"
);
return (HMDoc*)m;
}
static VOID
MIFwriter_dt(this)
HMDoc* this;
{
FREE(this);
}
static VOID
data(document, chars, nchars)
HMDoc* document;
CONST char* chars;
int nchars;
{
MIF* m = (MIF*)document;
Element* e = &m->stack[m->taglvl - 1];
CONST char* p;
debug(("<emptypar: %d 1st char: %d nchars: %d>\n",
m->empty, chars[0], nchars));
if(chars[0] == '\n' && nchars <2 &&
m->literal == 0 && m->empty)
return;
switch(m->state){
case MIFFile:
start_tag((HMDoc*)m, "BODY", 0, 0);
fprintf(m->out,
" <Para\n"
" <PgfTag `BODY'>\n"
" <ParaLine\n"
" <String `");
STATE(m, ParaLine, 0, 1);
break;
case TextFlow:
fprintf(m->out,
" <Para\n"
" <PgfTag `%s'>\n"
" <ParaLine\n"
" <String `", e->gi);
STATE(m, ParaLine, 0, 1);
break;
case VariableFormats:
/* in element content. Skip data */
return;
case VariableDef:
/* nothing */
break;
default:
fprintf(m->out,
" <String `");
}
for(p = chars; p-chars < nchars; p++){
if(*p != '\n')
m->empty = 0;
if(*p & 0x80){
int i = (*p & 0xFF) - 160;
if(i < 96) /* in ISOlat1 encoding? */
printf("\\x%02x ", FrameEncoding[i]);
}else
switch(*p){
case '\n':
if(m->literal)
fprintf(m->out,
"'>\n"
" <Char HardReturn>\n"
" > # End ParaLine\n"
" <ParaLine\n"
" <String `");
else if (m->needspace){
fprintf(m->out, " ");
m->needspace = 0;
}
break;
case '\r':
/* nothing */
break;
case '\t':
fprintf(m->out, "\\t");
m->needspace = 0;
break;
case '>':
fprintf(m->out, "\\>");
m->needspace = 1;
break;
case '\'':
fprintf(m->out, "\\q");
m->needspace = 1;
break;
case '`':
fprintf(m->out, "\\Q");
m->needspace = 1;
break;
case '\\':
fprintf(m->out, "\\\\");
m->needspace = 1;
break;
case ' ':
if(m->literal){
fprintf(m->out,
"'>\n"
" <Char HardSpace>\n"
" <String `");
}else{
m->needspace = 0;
fprintf(m->out, " ");
}
break;
default:
m->needspace = 1;
fprintf(m->out, "%c", *p);
}
}
fprintf(m->out, "'>\n");
}
#if 0
/* save this for insets */
static VOID
entity(document, name)
HMDoc* document;
CONST char* name;
{
MIF* m = (MIF*)document;
/*@@ same prep work as data */
fprintf(m->out, " <Char %s>\n", name);
m->needspace = 1;
}
#endif
static VOID
marker(m, attributes, nattrs)
MIF* m;
CONST HMBinding *attributes;
int nattrs;
{
int i;
char* name = 0;
char* href = 0;
for(i = 0; i < nattrs; i++){
if(!strcmp(attributes[i].name, "NAME"))
name = attributes[i].value;
else if(!strcmp(attributes[i].name, "HREF"))
href = attributes[i].value;
}
if(href){
char* anchor = HTParse(href, "", PARSE_ANCHOR);
char* scheme = HTParse(href, "", PARSE_ACCESS);
char* path = HTParse(href, "", PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
fprintf(m->out,
" <Marker\n"
" <MType 8>\n");
if(scheme && *scheme)
fprintf(m->out,
" <MText `message www %s:%s#%s'>\n",
scheme, path, anchor);
else if(path && path[0] && path[1]){ /*@@ in case of just "/" */
if(anchor && *anchor)
fprintf(m->out,
" <MText `gotolink %s:%s'>\n",
path, anchor);
else
fprintf(m->out,
" <MText `gotolink %s:firstpage'>\n",
path);
}else
fprintf(m->out,
" <MText `gotolink %s'>\n",
anchor);
fprintf(m->out,
" > #End of Marker\n");
free(scheme);
free(path);
free(anchor);
}
else if (name){
fprintf(m->out,
" <Marker\n"
" <MType 8>\n"
" <MText `newlink %s'>\n"
" > #End of Marker\n",
name);
}
}
static int
start_tag(document, gi, attributes, nattrs)
HMDoc* document;
CONST char* gi;
CONST HMBinding attributes[];
int nattrs;
{
MIF* m = (MIF*)document;
Element* e = &m->stack[m->taglvl++];
int taglevel = -1;
m->needspace = 0;
strcpy(e->gi, gi);
debug(("stacking '%s'\n", gi));
if(!strcmp(gi, "H1") ||
!strcmp(gi, "H2") ||
!strcmp(gi, "H3") ||
!strcmp(gi, "H4") ||
!strcmp(gi, "H5") ||
!strcmp(gi, "H6") ||
!strcmp(gi, "PRE") ||
!strcmp(gi, "XMP") ||
!strcmp(gi, "LISTING") ||
!strcmp(gi, "ADDRESS") ||
!strcmp(gi, "BLOCKQUOTE") ||
!strcmp(gi, "UL") ||
!strcmp(gi, "OL") ||
!strcmp(gi, "MENU") ||
!strcmp(gi, "DIR") ||
!strcmp(gi, "DL")
)
taglevel = ParaLine;
else
if(!strcmp(gi, "A") ||
!strcmp(gi, "EM") ||
!strcmp(gi, "TT") ||
!strcmp(gi, "STRONG") ||
!strcmp(gi, "B") ||
!strcmp(gi, "I") ||
!strcmp(gi, "U") ||
!strcmp(gi, "CODE") ||
!strcmp(gi, "SAMP") ||
!strcmp(gi, "KBD") ||
!strcmp(gi, "KEY") ||
!strcmp(gi, "VAR") ||
!strcmp(gi, "DFN") ||
!strcmp(gi, "CITE"))
taglevel = Font;
while(1){
switch(m->state){
case MIFFile:
if(!strcmp(gi, "BODY")){
fprintf(m->out, "<TextFlow\n");
STATE(m, TextFlow, 0, 1);
return e->content = SGML_MIXED;
}
else if(!strcmp(gi, "HEAD")){
return e->content = SGML_ELEMENT;
}
else if(!strcmp(gi, "TITLE")){
fprintf(m->out,
"<VariableFormats\n"
" <VariableFormat\n"
" <VariableName `Title'>\n"
" <VariableDef `"
);
STATE(m, VariableDef, 0, 1);
return e->content = SGML_RCDATA; /*@@ CDATA? */
}
else if(!strcmp(gi, "ISINDEX")){
fprintf(m->out,
"<VariableFormats\n"
" <VariableFormat\n"
" <VariableName `Index'>\n"
" <VariableDef `True'>\n"
" >\n"
);
STATE(m, VariableFormats, 0, 1);
m->taglvl--;
return SGML_EMPTY;
}
else if(taglevel == ParaLine || taglevel == Font){
start_tag((HMDoc*)m, "BODY", 0, 0);
}
else{
debug(("'%s' out of context in state %d", gi, m->state));
m->taglvl--;
return SGML_EMPTY;
}
break;
case VariableFormats:
if(!strcmp(gi, "TITLE")){
fprintf(m->out,
" <VariableFormat\n"
" <VariableName `Title'>\n"
" <VariableDef `"
);
STATE(m, VariableDef, 0, 1);
return e->content = SGML_RCDATA; /*@@ CDATA? */
}
else if(!strcmp(gi, "ISINDEX")){
fprintf(m->out,
" <VariableFormat\n"
" <VariableName `Index'>\n"
" <VariableDef `True'>\n"
" >\n"
);
m->taglvl--;
return SGML_EMPTY;
}
else{
fprintf(m->out,
" > #End of VariableFormats\n");
STATE(m, MIFFile, 0, 1);
}
break;
case TextFlow:
if(!strcmp(gi, "PRE")){
fprintf(m->out,
" <Para\n"
" <PgfTag `%s'>\n"
" <ParaLine\n"
, gi);
STATE(m, ParaLine, 1, 1);
return e->content = SGML_MIXED;
}
else if(!strcmp(gi, "XMP") ||
!strcmp(gi, "LISTING")){
fprintf(m->out,
" <Para\n"
" <PgfTag `%s'>\n"
" <ParaLine\n"
, gi);
STATE(m, ParaLine, 1, 1);
return e->content = SGML_RCDATA;
}
else if(taglevel == ParaLine){
fprintf(m->out,
" <Para\n"
" <PgfTag `%s'>\n"
" <ParaLine\n"
, gi);
STATE(m, ParaLine, 0, 1);
return e->content = SGML_MIXED;
}
else if(taglevel == Font){
debug(("%s: transition from TextFlow to BODY ParaLine", gi));
fprintf(m->out,
" <Para\n"
" <PgfTag `BODY'>\n"
" <ParaLine\n");
STATE(m, ParaLine, 0, 1);
}
else{
debug(("'%s' out of context in state %d", gi, m->state));
m->taglvl--;
return SGML_EMPTY;
}
break;
case ParaLine:
if(!strcmp(gi, "A")){
fprintf(m->out,
" <Font\n"
" <FTag `%s'>\n"
" >\n", gi);
marker(m, attributes, nattrs);
STATE(m, Font, m->literal, 0);
return e->content = SGML_MIXED;
}
else if(taglevel == Font){
fprintf(m->out,
" <Font\n"
" <FTag `%s'>\n"
" >\n"
, gi);
STATE(m, Font, m->literal, 0);
return e->content = SGML_MIXED;
}
else if(!strcmp(gi, "P")){
m->taglvl--;
if(!m->empty)
fprintf(m->out,
" > # End ParaLine\n"
" > # End Para\n");
STATE(m, TextFlow, 0, 1);
return SGML_EMPTY;
}
else if(!strcmp(gi, "DT") ||
!strcmp(gi, "LI")){
m->taglvl--;
if(!m->empty)
fprintf(m->out,
" > # End ParaLine\n"
" > # End Para\n"
" <Para\n"
" <ParaLine\n");
m->empty = 1;
m->needspace = 0;
return SGML_EMPTY;
}
else if(!strcmp(gi, "DD")){
fprintf(m->out,
" <Char Tab>\n");
m->taglvl--;
return SGML_EMPTY;
}
else if(taglevel = ParaLine){
debug(("'%s' start tag: back to TextFlow state\n", gi));
fprintf(m->out,
" > # End of ParaLine\n"
" > # End of Para\n"
);
STATE(m, TextFlow, 0, 1);
}
else{
debug(("'%s' out of context in state %d", gi, m->state));
m->taglvl--;
return SGML_EMPTY;
}
break;
default:
debug(("state %d unexpected (<%s>)\n", m->state, gi));
m->taglvl--;
return SGML_EMPTY;
}
}
}
static VOID
end_tag(document, gi)
HMDoc* document;
CONST char* gi;
{
MIF* m = (MIF*)document;
Element* e;
int i;
for(i = m->taglvl - 1; i>=0; i--){
debug(("found </%s>. stack has %s\n", gi, m->stack[i].gi));
if(m->stack[i].content == SGML_RCDATA ||
m->stack[i].content == SGML_CDATA ||
!strcmp(gi, m->stack[i].gi))
break;
}
if(i < 0){
debug(("Parse error: '%s' end tag with no such element open.\n", gi));
return;
}
while(m->taglvl > i){
m->taglvl--;
switch(m->state){
case VariableDef:
fprintf(m->out,
" > #End of VariableFormat\n");
STATE(m, VariableFormats, 0, 1);
break;
case VariableFormats:
fprintf(m->out,
"> #End of VariableFormats\n");
STATE(m, MIFFile, 0, 1);
break;
case TextFlow:
fprintf(m->out,
"> # End of TextFlow\n");
STATE(m, MIFFile, 0, 1);
break;
case ParaLine:
fprintf(m->out,
" > # End of ParaLine\n"
" > # End of Para\n");
STATE(m, TextFlow, 0, 1);
break;
case Font:
fprintf(m->out,
" <Font\n"
" <FTag `'>\n"
" > # End of Font\n");
STATE(m, ParaLine, m->literal, 0);
break;
default:
debug(("'%s' end tag unexpected in state %d.", gi, m->state));
}
}
}